# Replication code for: Bauer, Paul C. 2015. “Negative Experiences and Trust: A Causal Analysis of the Effects of Victimization on Generalized Trust.” European sociological review 31(4): 397–417.



################################################################################
############## START HERE WHEN USING ORIGINAL SHP FILES ########################
################################################################################
# You need to sign a contract to get the original files: http://forscenter.ch/en/our-surveys/swiss-household-panel/ 

# START AROUND LINE 85 WHEN USING ANONYMIZED REPLICATION DATASET.


# SET WORKING DIRECTORY
  path <- "C:/Users/pbauer/Google Drive/Research/Replication/Replication_Bauer_ESR_2015"
  setwd(path)


# IMPORT AND MERGE DATA 
  library(foreign)
  shp <- read.dta("shp99_p_user.dta", convert.underscore=TRUE, convert.factors=FALSE) 
  library(plyr)
  shp <- join(shp, read.dta("shp00_p_user.dta", convert.underscore=TRUE, convert.factors=FALSE), by="idpers", type="full")
  shp <- join(shp, read.dta("shp01_p_user.dta", convert.underscore=TRUE, convert.factors=FALSE), by="idpers", type="full")
  shp <- join(shp, read.dta("shp02_p_user.dta", convert.underscore=TRUE, convert.factors=FALSE), by="idpers", type="full")
  shp <- join(shp, read.dta("shp03_p_user.dta", convert.underscore=TRUE, convert.factors=FALSE), by="idpers", type="full")
  shp <- join(shp, read.dta("shp04_p_user.dta", convert.underscore=TRUE, convert.factors=FALSE), by="idpers", type="full")
  shp <- join(shp, read.dta("shp05_p_user.dta", convert.underscore=TRUE, convert.factors=FALSE), by="idpers", type="full")
  shp <- join(shp, read.dta("shp06_p_user.dta", convert.underscore=TRUE, convert.factors=FALSE), by="idpers", type="full")
  shp <- join(shp, read.dta("shp07_p_user.dta", convert.underscore=TRUE, convert.factors=FALSE), by="idpers", type="full")
  shp <- join(shp, read.dta("shp08_p_user.dta", convert.underscore=TRUE, convert.factors=FALSE), by="idpers", type="full")
  shp <- join(shp, read.dta("shp09_p_user.dta", convert.underscore=TRUE, convert.factors=FALSE), by="idpers", type="full")
  shp <- join(shp, read.dta("shp10_p_user.dta", convert.underscore=TRUE, convert.factors=FALSE), by="idpers", type="full")
  shp <- join(shp, read.dta("shp11_p_user.dta", convert.underscore=TRUE, convert.factors=FALSE), by="idpers", type="full")
  shp <- join(shp, read.dta("shp12_p_user.dta", convert.underscore=TRUE, convert.factors=FALSE), by="idpers", type="full")
  nrow(shp)
  detach("package:plyr")
  write.table(shp, "datacomplete.csv", sep=",", row.names=FALSE)
  rm(list=ls())


# USE MERGED DATASET
  datacomplete <- read.csv("datacomplete.csv", sep=",")
  # IDENTIFY ALL NECESSARY ACROSS THE DIFFERENT YEARS BY FIRST LETTERS
  vars <- c("idhous04", "status", "p45", "y02", "l34", "l60", "l61", "l70", "l71", "l80", "l81", "sex", "age", "educat", "wyn", "l101", "l100", "r04","r05", "civsta", "l16", "l01", "n39", "n40" 
  , "n41", "n42", "n43", "n44", "n45", "n46", "l21", "i02", "wstat", "p04", "reg.1")
  used.vars <- NULL
  for (i in vars){
  used.vars <- c(used.vars, names(datacomplete)[grepl(i, names(datacomplete))])
  }
  data <- datacomplete[c(used.vars)]
  rm(datacomplete)
  

  
  # Prepare data for DATAVERSE (as requested by FORS)
    # Replace "idhous04" with random numbers
      length(unique(data$idhous04)) # 5623 unique household IDs
      some.numbers <- rnorm(5623)+4 # Generate 5623 random numbers
      length(some.numbers)
      
    # Add numbers to data
      library(plyr)
      r.numbers <- data.frame(cbind(count(data$idhous04), some.numbers)) # Generate dataframe with house ID and random numbers
      names(r.numbers) <- c("idhous04", "freq", "houseid") # Name variables
      data <- join(data, r.numbers, by="idhous04", type="left")  # Merge
      detach("package:plyr")
      data$houseid[is.na(data$idhous04)] <- NA 
      summary(data$idhous04)
      summary(data$houseid)
      unique(data$idhous04)
      unique(data$houseid)
      cbind(data$idhous04, data$houseid) # Check if that worked
      data <- data[,-1] # delete original household id variable
    
    # Save data for replication
      write.table(data, "data.csv", sep=",", row.names=FALSE)
      rm(list=ls())

  
  




################################################################################
############## START HERE WHEN USING DATAVERSE REPLICATION DATASET #############
################################################################################
# SET WORKING DIRECTORY
  path <- "C:/Users/pbauer/Google Drive/Research/Replication/Replication_Bauer_ESR_2015"
  setwd(path) 
  data <- read.csv("data.csv", sep=",")


# LOAD PACKAGES
  library(Matching)
  library(rgenoud)
  library(stargazer)
  library(stringr)
  library(Hmisc)



################################################################################
############## GENERATE OUTCOME VARIABLE #######################################
################################################################################

# GENERATE OUTCOME VARIABLE TRUST
  data$trust.2002 <- data$p02p45  
  data$trust.2003 <- data$p03p45
  data$trust.2004 <- data$p04p45
  data$trust.2005 <- data$p05p45
  data$trust.2006 <- data$p06p45
  data$trust.2007 <- data$p07p45
  data$trust.2008 <- data$p08p45
  data$trust.2009 <- data$p09p45
  data$trust.2010 <- data$p10p45
  data$trust.2011 <- data$p11p45
  data$trust.2012 <- data$p12p45
  vars <- names(data[c("trust.2002", "trust.2003", "trust.2004", "trust.2005", "trust.2006", "trust.2007", "trust.2008", "trust.2009", "trust.2010", "trust.2011", "trust.2012")])
  for (i in vars){
    data[c(i)][data[c(i)]==-3 | data[c(i)]==-2 | data[c(i)]==-1] <- NA 
  }
  # GENERATE CHANGE/DIFFERENCE IN OUTCOME TRUST
  data$trust.02.03 <- data$trust.2003 - data$trust.2002
  data$trust.03.04 <- data$trust.2004 - data$trust.2003
  data$trust.04.05 <- data$trust.2005 - data$trust.2004
  data$trust.05.06 <- data$trust.2006 - data$trust.2005
  data$trust.06.07 <- data$trust.2007 - data$trust.2006
  data$trust.07.08 <- data$trust.2008 - data$trust.2007


# GENERATE OUTCOME VARIABLE TRUST: VALUES 3 TO 7
  data$trust.3.7.2002 <- data$p02p45  
  data$trust.3.7.2003 <- data$p03p45
  data$trust.3.7.2004 <- data$p04p45
  data$trust.3.7.2005 <- data$p05p45
  data$trust.3.7.2006 <- data$p06p45
  data$trust.3.7.2007 <- data$p07p45
  data$trust.3.7.2008 <- data$p08p45
  data$trust.3.7.2009 <- data$p09p45
  data$trust.3.7.2010 <- data$p10p45
  data$trust.3.7.2011 <- data$p11p45
  data$trust.3.7.2012 <- data$p12p45
  vars <- names(data[c("trust.3.7.2002", "trust.3.7.2003", "trust.3.7.2004", "trust.3.7.2005", "trust.3.7.2006", "trust.3.7.2007", "trust.3.7.2008", "trust.3.7.2009", "trust.3.7.2010", "trust.3.7.2011", "trust.3.7.2012")])
  for (i in vars){
    data[c(i)][data[c(i)]<=2 | data[c(i)]>=8] <- NA 
  }
  table(data$trust.3.7.2004, data$p04p45)
  table(data$trust.3.7.2004)
  # GENERATE CHANGE/DIFFERENCE IN OUTCOME TRUST: VALUES 3 TO 7
  data$trust.3.7.02.03 <- data$trust.3.7.2003 - data$trust.3.7.2002
  data$trust.3.7.03.04 <- data$trust.3.7.2004 - data$trust.3.7.2003
  data$trust.3.7.04.05 <- data$trust.3.7.2005 - data$trust.3.7.2004
  data$trust.3.7.05.06 <- data$trust.3.7.2006 - data$trust.3.7.2005
  data$trust.3.7.06.07 <- data$trust.3.7.2007 - data$trust.3.7.2006
  data$trust.3.7.07.08 <- data$trust.3.7.2008 - data$trust.3.7.2007


# GENERATE OUTCOME VARIABLE TRUST: VALUES 7 TO 10
  data$trust.7.10.2002 <- data$p02p45  
  data$trust.7.10.2003 <- data$p03p45
  data$trust.7.10.2004 <- data$p04p45
  data$trust.7.10.2005 <- data$p05p45
  data$trust.7.10.2006 <- data$p06p45
  data$trust.7.10.2007 <- data$p07p45
  data$trust.7.10.2008 <- data$p08p45
  data$trust.7.10.2009 <- data$p09p45
  data$trust.7.10.2010 <- data$p10p45
  data$trust.7.10.2011 <- data$p11p45
  data$trust.7.10.2012 <- data$p12p45
  vars <- names(data[c("trust.7.10.2002", "trust.7.10.2003", "trust.7.10.2004", "trust.7.10.2005", "trust.7.10.2006", "trust.7.10.2007", "trust.7.10.2008", "trust.7.10.2009", "trust.7.10.2010", "trust.7.10.2011", "trust.7.10.2012")])
  for (i in vars){
    data[c(i)][data[c(i)]<=6] <- NA 
  }
  table(data$trust.7.10.2004, data$p04p45)
  # GENERATE CHANGE/DIFFERENCE IN OUTCOME TRUST: VALUES 7 TO 10
  data$trust.7.10.02.03 <- data$trust.7.10.2003 - data$trust.7.10.2002
  data$trust.7.10.03.04 <- data$trust.7.10.2004 - data$trust.7.10.2003
  data$trust.7.10.04.05 <- data$trust.7.10.2005 - data$trust.7.10.2004
  data$trust.7.10.05.06 <- data$trust.7.10.2006 - data$trust.7.10.2005
  data$trust.7.10.06.07 <- data$trust.7.10.2007 - data$trust.7.10.2006
  data$trust.7.10.07.08 <- data$trust.7.10.2008 - data$trust.7.10.2007



################################################################################
############## GENERATE TREATMENT VARIABLE #####################################
################################################################################

# GENERATE ATTACK VARIABLE FOR 2003 (1 = ja, 2 = nein -> recoded)
    # IMPORTANT FOR VICTIM VARIABLE
    data$attack.2003 <- data$p03y02  
    vars <- names(data[c("attack.2003")])
    for (i in vars){
      data[c(i)][data[c(i)]==-3 | data[c(i)]==-2 | data[c(i)]==-1] <- NA
      data[c(i)][data[c(i)]==2] <- 0 
    }


# GENERATE THREAT TREATMENT VARIABLE (Recode 1 = yes, 2 = no -> 0)
  data$threat.2004 <- data$p04l60
  data$threat.2005 <- data$p05l60
  data$threat.2006 <- data$p06l60
  data$threat.2007 <- data$p07l60
  data$threat.2008 <- data$p08l60
  vars <- names(data[c("threat.2004", "threat.2005", "threat.2006", "threat.2007", "threat.2008")])
  for (i in vars){
    data[c(i)][data[c(i)]==-3 | data[c(i)]==-2 | data[c(i)]==-1] <- NA
    data[c(i)][data[c(i)]==2] <- 0 
  }
  table(data$threat.2004)
# GENERATE SUFFERING TRHEAT TREATMENT VARIABLE
  data$threat.suffering.2004 <- data$p04l61
  data$threat.suffering.2005 <- data$p05l61
  data$threat.suffering.2006 <- data$p06l61
  data$threat.suffering.2007 <- data$p07l61
  data$threat.suffering.2008 <- data$p08l61
  vars <- names(data[c("threat.suffering.2004", "threat.suffering.2005", "threat.suffering.2006", "threat.suffering.2007", "threat.suffering.2008")])
  for (i in vars){
    data[c(i)][data[c(i)]==-3 | data[c(i)]==-2 | data[c(i)]==-1] <- 0
  }
  table(data$threat.suffering.2005)


# GENERATE INJURY TREATMENT VARIABLE (Recode 1 = yes, 2 = no -> 0)
  data$injury.2004 <- data$p04l70
  data$injury.2005 <- data$p05l70
  data$injury.2006 <- data$p06l70
  data$injury.2007 <- data$p07l70
  data$injury.2008 <- data$p08l70
  vars <- names(data[c("injury.2004", "injury.2005", "injury.2006", "injury.2007", "injury.2008")])
  for (i in vars){
    data[c(i)][data[c(i)]==-3 | data[c(i)]==-2 | data[c(i)]==-1] <- NA
    data[c(i)][data[c(i)]==2] <- 0 
  }
  table(data$injury.2004)
# GENERATE SUFFERING INJURY TREATMENT VARIABLE
  data$injury.suffering.2004 <- data$p04l71
  data$injury.suffering.2005 <- data$p05l71
  data$injury.suffering.2006 <- data$p06l71
  data$injury.suffering.2007 <- data$p07l71
  data$injury.suffering.2008 <- data$p08l71
  vars <- names(data[c("injury.suffering.2004", "injury.suffering.2005", "injury.suffering.2006", "injury.suffering.2007", "injury.suffering.2008")])
  for (i in vars){
    data[c(i)][data[c(i)]==-3 | data[c(i)]==-2 | data[c(i)]==-1] <- NA
  }


# GENERATE HARASSMENT TREATMENT VARIABLE (Recode 1 = yes, 2 = no -> 0)
    data$harassment.2004 <- data$p04l80
    data$harassment.2005 <- data$p05l80
    vars <- names(data[c("harassment.2004", "harassment.2005")])
    for (i in vars){
      data[c(i)][data[c(i)]==-3 | data[c(i)]==-2 | data[c(i)]==-1] <- NA
      data[c(i)][data[c(i)]==2] <- 0 
    }
# GENERATE SUFFERING HARASSMENT TREATMENT VARIABLE
    data$harassment.suffering.2004 <- data$p04l81
    data$harassment.suffering.2005 <- data$p05l81
    vars <- names(data[c("harassment.suffering.2004", "harassment.suffering.2005")])
    for (i in vars){
      data[c(i)][data[c(i)]==-3 | data[c(i)]==-2 | data[c(i)]==-1] <- NA
      data[c(i)][data[c(i)]==2] <- 0 
    }
    

# REPEATED VICTIMIZATION: DUMMY = 1 IF EXPERIENCED ONE OF THE VICTIMIZATIONS IN 
  # THE RESPECTIVE WAVE
  data$victim.2003 <- 0
  data$victim.2003[data$attack.2003==1] <- 1
  data$victim.2004 <- 0
  data$victim.2004[data$threat.2004==1 | data$injury.2004==1 | data$harassment.2004==1] <- 1
  data$victim.2005 <- 0
  data$victim.2005[data$threat.2005==1 | data$injury.2005==1 | data$harassment.2005==1] <- 1
  data$victim.2006 <- 0
  data$victim.2006[data$threat.2006==1 | data$injury.2006==1] <- 1
  data$victim.2007 <- 0
  data$victim.2007[data$threat.2007==1 | data$injury.2007==1] <- 1
  data$victim.2008 <- 0
  data$victim.2008[data$threat.2008==1 | data$injury.2008==1] <- 1
  data$rep.threat.count <- rowSums(data[c("victim.2004", "victim.2005" , "victim.2006", "victim.2007", "victim.2008")], na.rm=TRUE)
  table(data$rep.threat.count) # Show number of people repeated victimization across years
  # DUMMIES FOR TRHEAT VICTIMIZATION IN ONLY THAT YEAR
  vars <- c("threat.2004","threat.2005","threat.2006","threat.2007","threat.2008")
  for (i in vars){
    name <- paste(i, ".only", sep="")
    data[,name] <- data[,i]  
    data[,name][data$rep.threat.count>=2] <- 0  
  }
  table(data$victim.2003)
  table(data$victim.2004)
  table(data$victim.2005)
  table(data$victim.2006)
  table(data$victim.2007)
  table(data$victim.2008)

# SEVERAL VICTIMIZATIONS THE SAME YEAR: 2004-2008
  vars <- c(".2004",".2005",".2006",".2007",".2008")
  for (i in vars){
    threat <- paste("threat", i, sep="")
    injury <- paste("injury", i, sep="")    
    harassment <- paste("harassment", i, sep="")   
    victim.several <- paste("victim.several", i, sep="")
    data[,victim.several] <- 0
    if (i==".2004" | i==".2005"){data[,victim.several][data[,threat] + data[,injury] + data[,harassment]>=2] <- 1}
    if (i==".2006" | i==".2007" | i==".2008"){data[,victim.several][data[,threat] + data[,injury]>=2] <- 1}    
  }
  table(data$victim.several.2004)
  table(data$victim.several.2005)
  table(data$victim.several.2006)
  table(data$victim.several.2007)
  table(data$victim.several.2008)



# CUMMULATIVE VICTIMIZATION N ACROSS WAVES (in the years before the treatment year)
  data$cumulative.previous.victimization.2004 <- data$victim.2003
  data$cumulative.previous.victimization.2005 <- data$victim.2003 + data$victim.2004
  data$cumulative.previous.victimization.2006 <- data$victim.2003 + data$victim.2004 + data$victim.2005
  data$cumulative.previous.victimization.2007 <- data$victim.2003 + data$victim.2004 + data$victim.2005 + data$victim.2006
  data$cumulative.previous.victimization.2008 <- data$victim.2003 + data$victim.2004 + data$victim.2005 + data$victim.2006 + data$victim.2007
  table(data$cumulative.previous.victimization.2005)
  table(data$cumulative.previous.victimization.2006)
  table(data$cumulative.previous.victimization.2007)
  table(data$cumulative.previous.victimization.2008)


# GENERATE NEW TREATMENT VARIABLES FOR STRONG SUFFERING
  threat <- c("threat.2004", "threat.2005", "threat.2006", "threat.2007", "threat.2008") # all treatments
  threat.suffering <- c("threat.suffering.2004", "threat.suffering.2005", "threat.suffering.2006", "threat.suffering.2007", "threat.suffering.2008")
  for (i in 1:5){
    intense.threat <- paste("intense.", threat[i], sep="")
    data[,intense.threat] <- NA
    data[,intense.threat][data[,threat[i]]==0] <- 0
    data[,intense.threat][data[,threat[i]]==1 & data[,threat.suffering[i]]>=7] <- 1     
  } # >7 I STHE LEVEL OF SUFFERING
  
  table(data$intense.threat.2005, data$threat.suffering.2005)        
  table(data$intense.threat.2005, data$threat.2005)  



################################################################################
############## GENERATE COVARIATES/CONTROLS ####################################
################################################################################
# GENDER
  data$male <- data$sex04
  data$male[data$male==2] <- 0
  table(data$male, data$sex04)


# AGE
  data$age <- data$age04
  data$age[data$age==-3 | data$age==-2 | data$age==-1] <- NA
  table(data$age, data$age04) # age is used in the analysis 



# EDUCATION
  years <- c("03", "04", "05", "06", "07", "08")
  for (i in years){
    y <- paste("education.20", i, sep="") 
    z <- paste("educat", i, sep="") 
    data[c(y)] <- data[z]
    data[c(y)][data[c(y)]<=-1] <- NA
  }
  table(data$education.2003, data$educat03)


# UNEMPLOYMENT
  vars <- names(data[c("wstat02", "wstat03", "wstat04", "wstat05", "wstat06", "wstat07", "wstat08", "wstat09", "wstat10", "wstat11", "wstat12")])
  for (i in vars){
    data[c(i)][data[c(i)]<=0] <- NA 
    #   data[c(i)][data[c(i)]==3] <- 0   
    #   data[c(i)][data[c(i)]==1] <- 0 
    #   data[c(i)][data[c(i)]==2] <- 1  
  }
  data$unemployed.2002 <- data$wstat02 
  data$unemployed.2003 <- data$wstat03
  data$unemployed.2004 <- data$wstat04
  data$unemployed.2005 <- data$wstat05
  data$unemployed.2006 <- data$wstat06
  data$unemployed.2007 <- data$wstat07
  data$unemployed.2008 <- data$wstat08
  data$unemployed.2009 <- data$wstat09
  data$unemployed.2010 <- data$wstat10
  data$unemployed.2011 <- data$wstat11
  data$unemployed.2012 <- data$wstat12
  table(data$unemployed.2004, data$wstat04)
  vars <- names(data[c("unemployed.2002", "unemployed.2003", "unemployed.2004", "unemployed.2005", "unemployed.2006", "unemployed.2007", "unemployed.2008", "unemployed.2009", "unemployed.2010", "unemployed.2011", "unemployed.2012")])
  for (i in vars){
    data[c(i)][data[c(i)]==3] <- 0   
    data[c(i)][data[c(i)]==1] <- 0 
    data[c(i)][data[c(i)]==2] <- 1  # 1 if unemployedloyed
  }


# JOBLOSS: 1 if unemployed status changed from active occupied to unemployed
  #       0 for all others
  data$jobloss.03.04 <- 0
  data$jobloss.04.05 <- 0
  data$jobloss.05.06 <- 0
  data$jobloss.06.07 <- 0
  data$jobloss.07.08 <- 0
  data$jobloss.03.04[is.na(data$wstat04) | is.na(data$wstat03)] <- NA
  data$jobloss.04.05[is.na(data$wstat05) | is.na(data$wstat04)] <- NA
  data$jobloss.05.06[is.na(data$wstat06) | is.na(data$wstat05)] <- NA
  data$jobloss.06.07[is.na(data$wstat07) | is.na(data$wstat06)] <- NA
  data$jobloss.07.08[is.na(data$wstat08) | is.na(data$wstat07)] <- NA
  data$jobloss.03.04[data$wstat04==2 & data$wstat03==1] <- 1
  data$jobloss.04.05[data$wstat05==2 & data$wstat04==1] <- 1
  data$jobloss.05.06[data$wstat06==2 & data$wstat05==1] <- 1
  data$jobloss.06.07[data$wstat07==2 & data$wstat06==1] <- 1
  data$jobloss.07.08[data$wstat08==2 & data$wstat07==1] <- 1


# INCOME
  years <- c("03", "04", "05", "06", "07", "08")
  for (i in years){
    y <- paste("income.20", i, sep="") 
    z <- paste("i", i, "wyn", sep="") 
    data[c(y)] <- data[z]
    data[c(y)][data[c(y)]<=-1] <- NA
    data[c(y)][data[c(y)]<=22110] <- 0
    data[c(y)][data[c(y)]>22110 & data[c(y)]<=54310] <- 1
    data[c(y)][data[c(y)]>54310 & data[c(y)]<=73700] <- 2
    data[c(y)][data[c(y)]>73700] <- 3  
  }
  hist(data$income.2003)
  summary(data$i08wyn)
  summary(data$income.2003)


# MEMBER: 1 IF MEMBER IN AT LEAST ONE ASSOCIATION (P04N39-P04N46)
  years <- c("03", "04", "05", "06", "07", "08")
  for (i in years){
    data[c(paste("member.20", i, sep=""))] <- 0
    data[c(paste("member.20", i, sep=""))][data[c(paste("p", i, "n39", sep=""))]==1 |
                                             data[c(paste("p", i, "n40", sep=""))]==1 |
                                             data[c(paste("p", i, "n41", sep=""))]==1 |
                                             data[c(paste("p", i, "n42", sep=""))]==1 |
                                             data[c(paste("p", i, "n43", sep=""))]==1 |
                                             data[c(paste("p", i, "n44", sep=""))]==1 |
                                             data[c(paste("p", i, "n45", sep=""))]==1 |
                                             data[c(paste("p", i, "n46", sep=""))]==1] <- 1
    
  }
  table(data$member.2007)


# MINORITY STATUS (derived from Reg_1_**: "First nationality by region")
  # SWISS/EUROPE/U.S./OCEANIA/ANTARCTICA = 0 VS. AFRICA/LATIN AMERICA/ASIA = 1
  years <- c("03", "04", "05", "06", "07", "08")
  for (i in years){
    y <- paste("minority.20", i, sep="") 
    z <- paste("reg.1.", i, sep="") 
    data[c(y)] <- data[z]
    data[c(y)][data[c(y)]<0] <- NA
    data[c(y)][data[c(y)]<=17] <- 0
    data[c(y)][data[c(y)]==31] <- 0    
    data[c(y)][data[c(y)]==50] <- 0
    data[c(y)][data[c(y)]==60] <- 0      
    data[c(y)][data[c(y)]==20] <- 1 # Africa
    data[c(y)][data[c(y)]==30] <- 1 # Latin America
    data[c(y)][data[c(y)]==40] <- 1 # Asia
  print(table(data[,y], data[,z]))  
  print(table(data[,y]))
  }







# KEEP RELEVANT VARIABLES
  data <- data[,c(1:match("status12",names(data)), c(match("houseid",names(data)):length(names(data))))] # pull out relevant variables
  for (i in 1:length(data)){
    data[,c(i)] <- as.numeric(as.character(data[,c(i)]))
  }


# CHECK DATA TYPES OF RELEVANT VARIABLES
  str(data)
# CHECK CLASS OF RELEVANT VARIABLES
  for (i in 1:length(names(data))){print(class(data[,i]))}


##########################################
############## DESCRIPTIVE STATISTICS#####
##########################################

# TABLE2: SUMMARY TABLE OF VARIABLES
####################################
  library(psych)
  library(xtable)
  # THERE ARE SOME RESPONDENTS AGED 11 WHO HAVE BEEN ASKED THE VICTIMIZATION QUESTIONS
  # THEREFORE SUMMARY STATISTICS ARE CALCULATED FOR ALL ABOVE 10
  # THOSE BELOW 10 DROP OUT IN LATER ANALYSIS BECAUSE THEY DIDN'T ANSWER THE TRUST QUESTIONS
  data2 <- data[data$age>10,] 
  overview <- describe(data2[,c("trust.2003", "trust.2004", "trust.2005", "trust.2006", "trust.2007", "trust.2008", 
                               "trust.03.04" , "trust.04.05" , "trust.05.06", "trust.06.07", "trust.07.08", 
                               "threat.2004", "threat.2005", "threat.2006", "threat.2007", "threat.2008", 
                               "intense.threat.2004", "intense.threat.2005", "intense.threat.2006", "intense.threat.2007", "intense.threat.2008",
                               "injury.2004", "injury.2005", "injury.2006", "injury.2007", "injury.2008", 
                               "harassment.2004", "harassment.2005", 
                               "male", 
                               "age",   
                               "education.2003", "education.2004", "education.2005", "education.2006", "education.2007", 
                               "member.2003", "member.2004", "member.2005", "member.2006", "member.2007", 
                               "income.2003", "income.2004", "income.2005", "income.2006", "income.2007",                                                             
                               "victim.2003", "victim.2004", "victim.2005", "victim.2006", "victim.2007",      
                               "unemployed.2003", "unemployed.2004", "unemployed.2005", "unemployed.2006", "unemployed.2007",    
                               "jobloss.03.04", "jobloss.04.05", "jobloss.05.06", "jobloss.06.07", "jobloss.07.08",
                               "minority.2003", "minority.2004", "minority.2005", "minority.2006", "minority.2007")])            
  detach("package:psych")
  overview <- data.frame(overview)
  overview <- round(overview, 2)
  overview <- overview[,-c(6, 7, 11, 12, 13)]
  rownames(overview) <- capitalize(str_replace_all(rownames(overview), "[.]", " "))
  xtable <- xtable::xtable
  xtable(overview, digits = c(0, 0,0,2,2,0,0,0,0))




#######################################################
# FIGURE 1: TRENDPLOT FOR EXPERIENCES ACROSS THE SAMPLE
#######################################################
  year <- c(2001:2012) 
  # status: N of realized individual interviews
  vars <- c("status01", "status02", "status03", "status04", "status05", "status06", "status07", "status08", "status09", "status10", "status11", "status12")
  status <- NULL
  for(i in vars){
    status <- cbind(status, status2 <- table(data[c(i)][data[c(i)]==0]))
  }
  status <- as.data.frame(status)
  names(status) <- vars

  vars <- c("threat.2004", "threat.2005", "threat.2006", "threat.2007", "threat.2008")
  threat <- NULL
  for(i in vars){
    threat <- cbind(threat, threat2 <- table(data[c(i)]))
  }
  threat <- rbind(threat, threat[1,] + threat[2,]) # summe!
  threat <- as.data.frame(threat)
  names(threat) <- vars
  #
  vars <- c("injury.2004", "injury.2005", "injury.2006", "injury.2007", "injury.2008")
  injury <- NULL
  for(i in vars){
    injury <- cbind(injury, injury2 <- table(data[c(i)]))
  }
  injury <- rbind(injury, injury[1,] + injury[2,]) # summe!
  injury <- as.data.frame(injury)
  injury <- as.data.frame(injury)
  names(injury) <- vars
  #
  vars <- c("harassment.2004", "harassment.2005")
  harassment <- NULL
  for(i in vars){
    harassment <- cbind(harassment, harassment2 <- table(data[c(i)]))
  }
  harassment <- rbind(harassment, harassment[1,] + harassment[2,]) # summe!
  harassment <- as.data.frame(harassment)
  harassment <- as.data.frame(harassment)
  names(harassment) <- vars  
  # PLOT
  windows(7,5)
  par(mar = c(4,4,1,1))
  xrange <- c(2003,2009)
  yrange <- c(0,8250) 
  plot(xrange, yrange, type="n", xlab="Year", ylab="Sample and absolute number of victims", 
       xlim=xrange, xaxt='n', yaxt="n")   
  axis(1, at=year,labels=year, cex.axis=.8)
  axis(2, at=seq(0,8500,1000),labels=seq(0,8500,1000), cex.axis=.8)  
  #
  for (i in 1:12){points(year[i], status[1,i], type="b", lwd=1.5, lty=1, col="black", pch=20)}
  # for (i in 2:12){points(year[i], trust[1,i-1], type="b", lwd=1.5, lty=1, col="gray", pch=20)}# almost equal to status  
  for (i in 4:8){points(year[i], threat[2,i-3], type="b", lwd=1.5, lty=1, col="black", pch=20)} # colors[x]
  for (i in 4:8){points(year[i], injury[2,i-3], type="b", lwd=1.5, lty=1, col="black", pch=20)} # colors[x]
  for (i in 4:5){points(year[i], harassment[2,i-3], type="b", lwd=1.5, lty=1, col="black", pch=20)} # colors[x]  
  # POLYGONE
  polygon(c(2001, year[1:12], year[12:1]), c(0, as.numeric(status),rep(0,12)),col="#92929222", border=0) # polygon for sample
  polygon(c(2003, 2003, year[4:8], year[8:4], 2003), c(0, 772, as.numeric(threat[2,][1:5]),0,0,0,0,0,0),col="#FF5C5C22", border=0) # poligon for threat victims
  polygon(c(2003, 2003, year[4:8], year[8:4], 2003), c(0, 112, as.numeric(injury[2,][1:5]),0,0,0,0,0,0),col="#FF000022", border=0) # polygon for injury victims  
  polygon(c(2003, 2003, year[4:5], year[5:4], 2003), c(0, 43, as.numeric(harassment[2,][1:2]),0,0,0),col="red", border=0) # polygon for injury victims  
  abline(v=c(2001:2012), col="gray", lty=3)
  # LABELING
  text(year[1:12], as.numeric(status), as.character(status), cex=.6, pos=1, offset=.3)
  text(year[4:8], as.numeric(threat[2,][1:5]), as.character(threat[2,][1:5]), cex=.6, pos=3, offset=.3)
  text(year[4:5], as.numeric(harassment[2,][1:2]), as.character(harassment[2,][1:2]), cex=.6, pos=1, offset=.3)
  text(year[4:8], as.numeric(injury[2,][1:5]), as.character(injury[2,][1:5]), cex=.6, pos=3, offset=.3)
  # LEGEND
  legend(2003.25,4005,
         # , inset = c(0,0.4), 
         cex = .7, 
         # , bty = "n",
         bg = "white",
         legend = c("Full Sample",
                    "Have you been insulted or threatened verbally since (month, year)? Yes.", 
                    "Have you been hit or injured since (month, year)? Yes.",
                    "Have you been sexually harassed or forced to perform sexual acts since (month, year)? Yes."), 
         text.col = c("black", "black"),
         col = c("#92929222", "#FF5C5C22", "#FF000022", "red"), 
         pt.bg = c("#92929222", "#FF5C5C22", "#FF000022", "red")
         , pch = c(22,22,22,22))
  savePlot("figure1", type = "pdf")
  dev.off()
  






################################################################################
############## START ANALYIS ###################################################
################################################################################


# FIGURE 2/TABLE 3: NAIVE ESTIMATES
###################################
  
    # DEFINE LOOP VARIABLES
      outcomes <- paste("trust.", c(rep(2004,3), rep(2005,3), rep(2006,2), 
                                    rep(2007,2), rep(2008,2)), sep="")
      treatments <- c("threat.2004", "injury.2004", "harassment.2004", 
                      "threat.2005", "injury.2005", "harassment.2005", 
                      "threat.2006", "injury.2006", 
                      "threat.2007", "injury.2007", 
                      "threat.2008", "injury.2008")
      treatments.std <- c("threat.2004.std", "injury.2004.std", 
                          "harassment.2004.std", "threat.2005.std", 
                          "injury.2005.std", "harassment.2005.std", 
                          "threat.2006.std", "injury.2006.std", 
                          "threat.2007.std", "injury.2007.std", 
                          "threat.2008.std", "injury.2008.std")
    # DEFINE OUTPUT OBJECTS
      outputs <- NULL
      summary.outputs <- NULL
      outputs.std <- NULL
      summary.outputs.std <- NULL
    # START LOOP
      for (i in 1:12){ # loop over treatments
            # victim.several.i <- paste("victim.several.200", t1, sep="")  # TURN ON TO DELETE MULTI-VICTIMS
            # dataset <- data[data[,victim.several]==0,vars]        # TURN ON TO DELETE MULTI-VICTIMS  
        outcome <- outcomes[i]
        treatment <- treatments[i]
        treatment.std <- treatments.std[i]
        print(outcome); print(treatment);print(treatment.std)
        outputs[[i]] <- lm(paste(outcome, " ~ ", treatment, sep=""), data=data, na.action=na.omit)
        summary.outputs[[i]] <- summary(outputs[[i]])
      }
    # TABLES FOR ESTIMATION OUTPUTS
      stargazer(outputs[[1]], outputs[[2]], outputs[[3]], outputs[[4]], outputs[[5]], outputs[[6]],
                outputs[[7]], outputs[[8]], outputs[[9]], outputs[[10]], outputs[[11]], outputs[[12]], 
                column.labels = capitalize(str_replace_all(outcomes, "[.]", " ")),
                type="latex", title="Regression Results", dep.var.labels=NULL, 
                covariate.labels=capitalize(str_replace_all(treatments, "[.]", " ")), omit.stat=c("LL","ser","f"), ci=FALSE, digits=2, ci.level=0.95, single.row=FALSE)    
    # ESTIMATES FOR PLOT
      estimates <- NULL
      for (i in 1:12){
        estimate.i <- c(outputs[[i]]$coef[2], confint(outputs[[i]])[2,1],confint(outputs[[i]])[2,2])
        estimates <- rbind(estimates, estimate.i)
      }
      estimates <- data.frame(estimates)
      estimates <- cbind(estimates, as.numeric(str_replace_all(outcomes, "trust.", "")), treatments)
      estimates$treatments <- as.character(estimates$treatments)
      names(estimates) <- c("coef", "lowerci", "upperci", "year", "treatment") 
    # GRAPH ESTIMATES
      windows(6,4)
      par(mar=c(3, 4, 1, 0), oma=c(0, 0, 0, 0))
      # points and segments for attack and threat
      plot(estimates[grep("threat", estimates$treatment), 4]-0.15, estimates[grep("threat", estimates$treatment), 1], pch=16, axes = F, xlab = "", ylab = "", cex = .8, ylim=c(-2,2), xlim = c(2003.8,2008.5), xaxs = "r", main = "") # plot coefficients
      segments(2003, -1, 2008.2, -1, lwd =  1, lty=3, col="gray") # 0 line
      legend(2008,-2, cex = .7, bg = "white",
             legend = c("Insult/Threat", "Hit/Injury", "Harassment"),
             text.col = c("black", "black", "black"),
             col = c("black", "black", "black"), 
             pt.bg = c("white", "white", "white")
             , pch = c(16,24,1), yjust=0, xjust=1)

      segments(estimates[grep("threat", estimates$treatment), 4]-0.15, estimates[grep("threat", estimates$treatment), 2], estimates[grep("threat", estimates$treatment), 4]-0.15, estimates[grep("threat", estimates$treatment), 3], lwd =  1) # plot confidence bars
      axis(1, at = seq(2004,2008, by=1), tick = T,cex.axis = .8) # x-axis
      axis(2, at = seq(-2,2, by=1), label = seq(-2,2, by=1), las = 1, tick = T, mgp = c(2,.6,0), cex.axis = .8)
      segments(2003, 0, 2008.2, 0, lwd =  1, lty=2, col="gray") # 0 line
      segments(2003, 1, 2008.2, 1, lwd =  1, lty=3, col="gray") 
      mtext("Generalized trust (Y)", side=2, line=2, cex=.8)
      # points and segments for injury
      points(estimates[grep("injury", estimates$treatment), 4], estimates[grep("injury", estimates$treatment), 1], pch = 24)
      segments(estimates[grep("injury", estimates$treatment), 4], estimates[grep("injury", estimates$treatment), 2], estimates[grep("injury", estimates$treatment), 4], estimates[grep("injury", estimates$treatment), 3], lwd =  1) # plot confidence bars
      # points and segments for harassment
      points(estimates[grep("harassment", estimates$treatment), 4]+0.15, estimates[grep("harassment", estimates$treatment), 1], pch = 1)
      segments(estimates[grep("harassment", estimates$treatment), 4]+0.15, estimates[grep("harassment", estimates$treatment), 2], estimates[grep("harassment", estimates$treatment), 4]+0.15, estimates[grep("harassment", estimates$treatment), 3], lwd =  1) # plot confidence bars
      # Add number of observations to plot
      for (i in c(1,4,7,9,11)){text(estimates$year[i]-0.15, .3, paste("M", i, ": ", "N,T,C = ", nobs(outputs[[i]]), ",", sum(data[,treatments[i]], na.rm=TRUE), ",", nobs(outputs[[i]])-sum(data[,treatments[i]], na.rm=TRUE), sep=""), srt=90, cex=.6, adj = c(0, NA))}
      for (i in c(2,5,8,10,12)){text(estimates$year[i], .3, paste("M", i, ": ", "N,T,C = ", nobs(outputs[[i]]), ",", sum(data[,treatments[i]], na.rm=TRUE), ",", nobs(outputs[[i]])-sum(data[,treatments[i]], na.rm=TRUE), sep=""), srt=90, cex=.6, adj = c(0, NA))}
      for (i in c(3,6)){text(estimates$year[i]+0.15, .3, paste("M", i, ": ", "N,T,C = ", nobs(outputs[[i]]), ",", sum(data[,treatments[i]], na.rm=TRUE), ",", nobs(outputs[[i]])-sum(data[,treatments[i]], na.rm=TRUE), sep=""), srt=90, cex=.6, adj = c(0, NA))}
      savePlot(filename = "figure2.pdf", type = "pdf")
      dev.off()









# FIGURE 3/TABLE 4: CHANGE SCORE ANALYSIS
#########################################
      
    # DEFINE LOOP VARIABLES
      years <- c("4", "4", "5", "5", "6", "6", "7", "7", "8", "8") # years to insert into variables names
      outcomes <- c("trust.03.04", "trust.03.04", "trust.04.05", "trust.04.05", "trust.05.06", "trust.05.06", "trust.06.07", "trust.06.07", "trust.07.08", "trust.07.08")
      treatments <- c("threat.2004", "injury.2004", "threat.2005", "injury.2005", "threat.2006", "injury.2006", 
                      "threat.2007", "injury.2007", "threat.2008", "injury.2008") # all treatments
    # DEFINE OUTPUT OBJECTS
      outputs <- NULL
      for (i in 1:10){ # check with i <- 1
    # DEFINE OUTCOME AND TREATMENT VARIABLE  
      outcome <- outcomes[i]; treatment <- treatments[i]
      print(outcome); print(treatment)
      outputs[[i]] <- lm(paste(outcome, " ~ ", treatment, sep=""), data=data, na.action=na.omit)
      }
      # TABLES FOR ESTIMATION OUTPUTS
      stargazer(outputs[[1]], outputs[[2]], outputs[[3]], outputs[[4]], outputs[[5]], outputs[[6]],
                outputs[[7]], outputs[[8]], outputs[[9]], outputs[[10]],  
                column.labels = capitalize(str_replace_all(outcomes, "[.]", " ")),
                type="latex", title="Regression Results", dep.var.labels=NULL, 
                covariate.labels=capitalize(str_replace_all(treatments, "[.]", " ")), omit.stat=c("LL","ser","f"), ci=FALSE, digits=2, ci.level=0.95, single.row=FALSE)    
    # ESTIMATES FOR PLOT
      estimates <- NULL
      for (i in 1:10){
        estimate.i <- c(outputs[[i]]$coef[2], confint(outputs[[i]])[2,1],confint(outputs[[i]])[2,2])
        estimates <- rbind(estimates, estimate.i)
      }
      estimates <- data.frame(estimates)
      estimates <- cbind(estimates, as.numeric(paste("200", years, sep="")), treatments) # CHANGE THAT SO THAT years Becomes obsolete
      estimates$treatments <- as.character(estimates$treatments)
      names(estimates) <- c("coef", "lowerci", "upperci", "year", "treatment") 
    # GRAPH ESTIMATES
      windows(6,4)
      par(mar=c(3, 4, 1, 0), oma=c(0, 0, 0, 0))
      # points and segments for attack and threat
      plot(estimates[grep("threat", estimates$treatment), 4]-0.15, estimates[grep("threat", estimates$treatment), 1], pch=16, axes = F, xlab = "", ylab = "", cex = .8, ylim=c(-2,2), xlim = c(2003.8,2008.5), xaxs = "r", main = "") # plot coefficients
      segments(estimates[grep("threat", estimates$treatment), 4]-0.15, estimates[grep("threat", estimates$treatment), 2], estimates[grep("threat", estimates$treatment), 4]-0.15, estimates[grep("threat", estimates$treatment), 3], lwd =  1) # plot confidence bars
      axis(1, at = seq(2004,2008, by=1), tick = T,cex.axis = 0.8) # x-axis
      axis(2, at = seq(-2,2, by=1), label = seq(-2,2, by=1), las = 1, tick = T,mgp = c(2,.6,0), cex.axis = 0.8)
      segments(2003, 0, 2008.2, 0, lwd =  1, lty=2, col="gray") # 0 line
      segments(2003, -1, 2008.2, -1, lwd =  1, lty=3, col="gray") 
      segments(2003, 1, 2008.2, 1, lwd =  1, lty=3, col="gray") 
      mtext(expression(paste("Generalized trust (", Delta,  "Y)", sep="")), side=2, line=2, cex=.8)
      # points and segments for injury
      points(estimates[grep("injury", estimates$treatment), 4], estimates[grep("injury", estimates$treatment), 1], pch = 24)
      segments(estimates[grep("injury", estimates$treatment), 4], estimates[grep("injury", estimates$treatment), 2], estimates[grep("injury", estimates$treatment), 4], estimates[grep("injury", estimates$treatment), 3], lwd =  1) # plot confidence bars
      # legend
      legend(2008,-2, cex = .7, bg = "white",
             legend = c("Insult/Threat", "Hit/Injury"),
             text.col = c("black", "black"),
             col = c("black", "black"), 
             pt.bg = c("white", "white")
             , pch = c(16,24), yjust=0, xjust=1)
      # TIPP: sum(data[!is.na(data[,"trust.07.08"]),"injury.2008"], na.rm=TRUE)
      for (i in c(1,3,5,7,9)){text(estimates$year[i]-0.15, .35, paste("M", i+12, ": ", "N,T,C = ", nobs(outputs[[i]]), ",", sum(data[!is.na(data[,outcomes[i]]),treatments[i]], na.rm=TRUE), ",", nobs(outputs[[i]])-sum(data[!is.na(data[,outcomes[i]]),treatments[i]], na.rm=TRUE), sep=""), srt=90, cex=.6, adj = c(0, NA))}
      for (i in c(2,4,8)){text(estimates$year[i], .35, paste("M", i+12, ": ", "N,T,C = ", nobs(outputs[[i]]), ",", sum(data[!is.na(data[,outcomes[i]]),treatments[i]], na.rm=TRUE), ",", nobs(outputs[[i]])-sum(data[!is.na(data[,outcomes[i]]),treatments[i]], na.rm=TRUE), sep=""), srt=90, cex=.6, adj = c(0, NA))}
      for (i in c(6,10)){text(estimates$year[i]+0.1, .35, paste("M", i+12, ": ", "N,T,C = ", nobs(outputs[[i]]), ",", sum(data[!is.na(data[,outcomes[i]]),treatments[i]], na.rm=TRUE), ",", nobs(outputs[[i]])-sum(data[!is.na(data[,outcomes[i]]),treatments[i]], na.rm=TRUE), sep=""), srt=90, cex=.6, adj = c(0, NA))}
      savePlot(filename = "figure3.pdf", type = "pdf")
      dev.off()





      
      
      
      

# FIGURE 4/TABLE 5 (MODEL 23-32): CHANGE SCORE ANALYSIS WITH GENETIC MATCHING
#############################################################################
      
    # INCLUDING MINORITY STATUS
    # DEFINE LOOP VARIABLES
    treatments <- c("threat.2004", "injury.2004", "threat.2005", "injury.2005", "threat.2006", "injury.2006", "threat.2007", "injury.2007", "threat.2008", "injury.2008") # all treatments
    outcomes <- c("trust.03.04", "trust.03.04", "trust.04.05","trust.04.05", "trust.05.06", "trust.05.06", "trust.06.07", "trust.06.07", "trust.07.08", "trust.07.08")
    male <- "male"
    age <- "age"
    educations <- c("education.2003", "education.2003", "education.2004", "education.2004", "education.2005", "education.2005", "education.2006", "education.2006", "education.2007", "education.2007")
    members <- c("member.2003", "member.2003", "member.2004", "member.2004", "member.2005", "member.2005", "member.2006", "member.2006", "member.2007", "member.2007")
    incomes <- c("income.2003", "income.2003", "income.2004", "income.2004", "income.2005", "income.2005", "income.2006", "income.2006", "income.2007", "income.2007")
    victims <- c("victim.2003", "victim.2003", "victim.2004", "victim.2004", "victim.2005", "victim.2005", "victim.2006", "victim.2006", "victim.2007", "victim.2007")
    unemps <- c("unemployed.2003", "unemployed.2003", "unemployed.2004", "unemployed.2004", "unemployed.2005", "unemployed.2005", "unemployed.2006", "unemployed.2006", "unemployed.2007", "unemployed.2007")
    joblosses <- c("jobloss.03.04", "jobloss.03.04", "jobloss.04.05", "jobloss.04.05", "jobloss.05.06", "jobloss.05.06", "jobloss.06.07", "jobloss.06.07", "jobloss.07.08", "jobloss.07.08")
    minorities <- c("minority.2003", "minority.2003", "minority.2004", "minority.2004", "minority.2005", "minority.2005","minority.2006", "minority.2006", "minority.2007", "minority.2007")                    

  # DEFINE OUTPUT OBJECTS
    mgens <- NULL # list for the Match() output
    balance <- NULL
    balance.stats <- NULL
    outputs <- NULL
    outputs.summary <- NULL
    data.matched.datasets <- NULL
    n.treated <- NULL
    n.matched <- NULL

  # START LOOP
    ptm <- proc.time() # START CLOCK
    for(i in 1:10){
    
  # DEFINE OUTCOME AND TREATMENT VARIABLE  
    outcome <- outcomes[i]; treatment <- treatments[i]; print(outcome); print(treatment) 
  
  # DEFINE CONTROLVARIABLES THAT HAVE TO BE LOOPED
    education <- educations[i]; member <- members[i]; income <- incomes[i]; victim <- victims[i]; unemp <- unemps[i]; jobloss <- joblosses[i]; minority <- minorities[i]
    print(age); print(male); print(education); print(member); print(income); print(victim); print(unemp); print(jobloss); print(minority) # print em to check

  
  # OMIT MISSINGS
    data2 <- data[c(outcome, treatment, male, age, education, member, income, victim, unemp, jobloss, minority)]
    data2 <- na.omit(data2)
  
  # DEFINE VARS FOR MATCHING
    Y <- data2[,outcome] # define outcome variable for GenMatch
    Tr <- data2[,treatment] # define treatment variable for GenMatch
    X <- cbind(data2[, male],data2[, age], data2[, education], data2[, member], data2[, income], data2[, victim], data2[, unemp], data2[, jobloss], data2[, minority])   # define covariates variable for GenMatch
  
  # MATCHING
    gen1 <- GenMatch(Tr = Tr, X = X, BalanceMatrix = X, pop.size = 500, M=1, estimand="ATT", replace=TRUE)
    mgens[[i]] <- Match(Y = Y, Tr = Tr, X = X, Weight.matrix = gen1)    
    print(summary(mgens[[i]])) 
    balance[[i]] <- MatchBalance(Tr ~ X, match.out = mgens[[i]], nboots = 1000, data = data2)
    

    # Regression with matched data and covariates
      # BELOW: PULLS UNIQUE TREATED OBS AND MATCHED OBS (CAN BE SEVERAL PER TREATED) OUT OF DATA2
      data.matched <- rbind(data2[unique(gen1$matches[,1]),],data2[gen1$matches[,2],])
      # BELOW: ATTACHES WEIGHTING VECTOR TO "data.matched": 1's FOR UNIQUE TREATED OBS AND OTHER WEIGHTS FOR MATCHES (OFTEN SEVERAL)
      data.matched <- cbind(data.matched, weights=c(rep(1, length(unique(gen1$matches[,1]))), gen1$matches[,3]))
      # BELOW: COUNTS NUMBER OF TREATED AND UNTREATED OBSERVATIONS
      n.treated[[i]] <- length(unique(gen1$matches[,1])) # count u 
      n.matched[[i]] <- length(gen1$matches[,2]) # untreated observations (matching with replaced), are downweighted if there are several matches
    

  # Save datasets for pooling later on
    data.matched.datasets[[i]] <- data.matched
  
  # COMPARE MEAN DIFFERENCES AS A QUICK CHECK: UNWEIGHTED VS. WEIGHTED (should be more similar with weights)
    mean(data.matched$male[data.matched$threat.2004==1])
    mean(data.matched$male[data.matched$threat.2004==0])
    weighted.mean(data.matched$male[data.matched$threat.2004==1], data.matched$weights[data.matched$threat.2004==1])
    weighted.mean(data.matched$male[data.matched$threat.2004==0], data.matched$weights[data.matched$threat.2004==0])  
  
    # test <- data.matched[order(data.matched$weights),]  
    outputs.summary[[i]] <- summary(lm(paste(outcome, "~", paste(names(data2)[-1],sep="", collapse=" + "), sep=""), data=data.matched, weights=weights))
    outputs[[i]] <- lm(paste(outcome, "~", paste(names(data2)[-1],sep="", collapse=" + "), sep=""), data=data.matched, weights=weights)
    
  # GENERATE BALANCE STATS
      n.covariates <- ncol(X)
      x.2 <- NULL
      for (z in 1:n.covariates){
        mean.diff.before <- balance[[i]]$BeforeMatching[[z]]$mean.Tr-balance[[i]]$BeforeMatching[[z]]$mean.Co
        mean.diff.after <-balance[[i]]$AfterMatching[[z]]$mean.Tr-balance[[i]]$AfterMatching[[z]]$mean.Co
        p.value.before <- balance[[i]]$BeforeMatching[[z]]$tt$p.value      
        p.value.after <- balance[[i]]$AfterMatching[[z]]$tt$p.value
        x <- c(mean.diff.before, p.value.before, mean.diff.after, p.value.after)
        x.2 <- rbind.data.frame(x.2, x)
      }
      x.2 <- cbind(x.2, names(data2)[-(1:2)])
      names(x.2) <- c("mean.diff.before", "p.value.before", "mean.diff.after", "p.value.after", "var") 
      balance.stats[[i]] <- x.2
      names(balance.stats)[i] <- capitalize(str_replace_all(outcome, "[.]", " "))
      balance.stats[[i]] <- round(balance.stats[[i]][,1:4], 2)
      balance.stats[[i]]$orig.nobs <- mgens[[i]]$orig.nobs 
      balance.stats[[i]]$orig.treated.nobs  <- mgens[[i]]$orig.treated.nobs
      balance.stats[[i]]$n.matched.observations  <- n.matched[[i]]    
      balance.stats[[i]] <- cbind(capitalize(str_replace_all(names(data2)[-(1:2)], "[.]", " ")), balance.stats[[i]])
      names(balance.stats[[i]])[1] <- "variable"
      rownames(balance.stats[[i]])[1] <- capitalize(str_replace_all(outcome, "[.]", " ")) 
      rownames(balance.stats[[i]])[2] <- capitalize(str_replace_all(treatment, "[.]", " ")) 
      names(balance.stats[[i]]) <- capitalize(str_replace_all(names(balance.stats[[i]]), "[.]", " "))
    }
    proc.time() - ptm # 1223/60=20min
    save(mgens, balance, balance.stats, outputs, outputs.summary, file="Table5_matching_results.RData")     # load("matching2.RData")

  # TABLE 5 FOR ESTIMATION OUTPUTS
    for (i in 1:10){
    names(outputs[[i]]$coefficients)[3:11] <- c("male", "age", "education", "member", 
                                                "income", "victim", "unemployed", "jobloss", "minority")
    names(outputs[[i]]$coefficients)  <- capitalize(str_replace_all(names(outputs[[i]]$coefficients), "[.]", " "))
    }
    capitalize <- Hmisc::capitalize # load function without package
    stargazer(outputs[[1]],  outputs[[2]],  outputs[[3]],  outputs[[4]],  outputs[[5]],
              outputs[[6]], outputs[[7]], outputs[[8]], outputs[[9]], outputs[[10]], 
    column.labels = capitalize(str_replace_all(outcomes, "[.]", " ")),
    type="latex", title="Regression Results", dep.var.labels=NULL, omit.stat=c("LL","ser","f"), ci=FALSE, digits=2, ci.level=0.95, single.row=FALSE)    

  # TABLE A5 FOR BALANCE STATS 
    balance.stats2 <- NULL
    for (i in 1:10){
      balance.stats2 <- rbind(balance.stats2, as.matrix(balance.stats[[i]]))
    } 
    raus <- c("3", "4", "5", "6", "7", "8", "9"); for(i in raus){rownames(balance.stats2)[rownames(balance.stats2)==i] <- "löschen"}
    library(Hmisc)
    latex(balance.stats2, file="")
    detach("package:Hmisc") 

 

    # ESTIMATES FOR PLOT
      estimates <- NULL
      for (i in 1:10){
        estimate.i <- c(outputs[[i]]$coef[2], confint(outputs[[i]])[2,1],confint(outputs[[i]])[2,2])
        estimates <- rbind(estimates, estimate.i)
      }
      estimates <- data.frame(estimates)
      years <- c("2004", "2004", "2005", "2005", "2006", "2006", "2007", "2007", "2008", "2008") # years to insert into variables names
      estimates <- cbind(estimates, as.numeric(years), treatments)
      estimates$treatments <- as.character(estimates$treatments)
      names(estimates) <- c("coef", "lowerci", "upperci", "year", "treatment") 

    
    # GRAPH ESTIMATES
      windows(6,4)
      par(mar=c(3, 4, 1, 0), oma=c(0, 0, 0, 0))
      # points and segments for attack and threat
      plot(estimates[grep("threat", estimates$treatment), 4]-0.15, estimates[grep("threat", estimates$treatment), 1], pch=16, axes = F, xlab = "", ylab = "", cex = .8, ylim=c(-2,2), xlim = c(2003.8,2008.5), xaxs = "r", main = "") # plot coefficients
      segments(estimates[grep("threat", estimates$treatment), 4]-0.15, estimates[grep("threat", estimates$treatment), 2], estimates[grep("threat", estimates$treatment), 4]-0.15, estimates[grep("threat", estimates$treatment), 3], lwd =  1) # plot confidence bars
      axis(1, at = seq(2004,2008, by=1), tick = T,cex.axis = 0.8) # x-axis
      axis(2, at = seq(-2,2, by=1), label = seq(-2,2, by=1), las = 1, tick = T, mgp = c(2,.6,0), cex.axis = 0.8)
      segments(2003, 0, 2008.2, 0, lwd =  1, lty=2, col="gray") # 0 line
      segments(2003, -1, 2008.2, -1, lwd =  1, lty=3, col="gray") 
      segments(2003, 1, 2008.2, 1, lwd =  1, lty=3, col="gray") 
      mtext(expression(paste("Generalized trust (", Delta,  "Y)", sep="")), side=2, line=2, cex=.8)
      # points and segments for injury
      points(estimates[grep("injury", estimates$treatment), 4], estimates[grep("injury", estimates$treatment), 1], pch = 24)
      segments(estimates[grep("injury", estimates$treatment), 4], estimates[grep("injury", estimates$treatment), 2], estimates[grep("injury", estimates$treatment), 4], estimates[grep("injury", estimates$treatment), 3], lwd =  1) # plot confidence bars
        legend(2008,2, cex = .7, bg = "white",
             legend = c("Insult/Threat", "Hit/Injury"),
             text.col = c("black", "black"),
             col = c("black", "black"), 
             pt.bg = c("white", "white")
             , pch = c(16,24), yjust=1, xjust=1)
      
      # Add number of observations to plot
      for (i in c(1,3,5,7,9)){text(estimates$year[i]-0.25, -2, paste("M", i+22, ": ","N,T,M =", nobs(outputs[[i]]), ",", n.treated[[i]], ",", n.matched[[i]], sep=""), srt=90, cex=.6, adj = c(0, NA))}
      for (i in c(2,4,6,8,10)){text(estimates$year[i]+.10, -2, paste("M", i+22, ": ","N,T,M =", nobs(outputs[[i]]), ",", n.treated[[i]], ",", n.matched[[i]], sep=""), srt=90, cex=.6, adj = c(0, NA))}
      savePlot(filename = "figure4.pdf", type = "pdf")
      dev.off()



      
      
      

# POOLING OF THREAT UND INJURY, 5 EACH (p.403)
######################################
      
  # RENAME VARIABLES IN MATCHED DATASETS TO LATER BIND THEM TOGETHER
    yearz <- c(2004, 2005, 2006, 2007, 2008)
    for (i in c(1,3,5,7,9)){
    names(data.matched.datasets[[i]]) <- c("trust.change", "threat", "male", "age", "education", "member", "income", "victim", "unemployed", "jobloss", "minority", "weights")
    year.i <- yearz[i]
    # ADD WAVE VARIABLE
    data.matched.datasets[[i]] <- cbind(data.matched.datasets[[i]], wave=rep(i, nrow(data.matched.datasets[[i]])))
    }
    for (i in c(2,4,6,8,10)){
    names(data.matched.datasets[[i]]) <- c("trust.change", "injury", "male", "age", "education", "member", "income", "victim", "unemployed", "jobloss", "minority", "weights")
    year.i <- yearz[i]
    # ADD WAVE VARIABLE
    data.matched.datasets[[i]] <- cbind(data.matched.datasets[[i]], wave=rep(i, nrow(data.matched.datasets[[i]])))
    }

    
  # RBIND MATCHED DATASETS
    dataset.threat.pooled <- rbind(data.matched.datasets[[1]],
                                   data.matched.datasets[[3]],
                                   data.matched.datasets[[5]],
                                   data.matched.datasets[[7]],
                                   data.matched.datasets[[9]])
    # CREATE WAVE DUMMIES
    dataset.threat.pooled$wave.2004 <- 0
    dataset.threat.pooled$wave.2004[dataset.threat.pooled$wave==1] <- 1
    dataset.threat.pooled$wave.2005 <- 0
    dataset.threat.pooled$wave.2005[dataset.threat.pooled$wave==3] <- 1
    dataset.threat.pooled$wave.2006 <- 0
    dataset.threat.pooled$wave.2006[dataset.threat.pooled$wave==5] <- 1
    dataset.threat.pooled$wave.2007 <- 0
    dataset.threat.pooled$wave.2007[dataset.threat.pooled$wave==7] <- 1
    dataset.threat.pooled$wave.2008 <- 0
    dataset.threat.pooled$wave.2008[dataset.threat.pooled$wave==9] <- 1
    dataset.injury.pooled <- rbind(data.matched.datasets[[2]],
                                   data.matched.datasets[[4]],
                                   data.matched.datasets[[6]],
                                   data.matched.datasets[[8]],
                                   data.matched.datasets[[10]])
    # CREATE WAVE DUMMIES
    dataset.injury.pooled$wave.2004 <- 0
    dataset.injury.pooled$wave.2004[dataset.injury.pooled$wave==2] <- 1
    dataset.injury.pooled$wave.2005 <- 0
    dataset.injury.pooled$wave.2005[dataset.injury.pooled$wave==4] <- 1
    dataset.injury.pooled$wave.2006 <- 0
    dataset.injury.pooled$wave.2006[dataset.injury.pooled$wave==6] <- 1
    dataset.injury.pooled$wave.2007 <- 0
    dataset.injury.pooled$wave.2007[dataset.injury.pooled$wave==8] <- 1
    dataset.injury.pooled$wave.2008 <- 0
    dataset.injury.pooled$wave.2008[dataset.injury.pooled$wave==10] <- 1

  # ESTIMATED POOLED EFFECTS
    pooled.output.threat <- lm(trust.change ~ threat + male + age + education + member + income + victim + unemployed + jobloss + minority, data=dataset.threat.pooled, weights=weights)
    pooled.output.injury <- lm(trust.change ~ injury + male + age + education + member + income + victim + unemployed + jobloss + minority, data=dataset.injury.pooled, weights=weights)
    summary(pooled.output.threat)
    summary(pooled.output.injury)
    # Number of observations
    nrow(is.na(dataset.threat.pooled)) # is.na not necessary since no missings
    nrow(is.na(dataset.injury.pooled))
       
    # WITH WAVE DUMMIES       
    pooled.output.threat <- lm(trust.change ~ threat + male + age + education + 
                                 member + income + victim + unemployed + 
                                 jobloss + minority +
                                 wave.2004 + wave.2005 + wave.2006 + wave.2007 +
                                 wave.2008, 
                               data=dataset.threat.pooled, weights=weights)
    pooled.output.injury <- lm(trust.change ~ injury + male + age + education + 
                                 member + income + victim + unemployed + 
                                 jobloss + minority +
                                 wave.2004 + wave.2005 + wave.2006 + wave.2007 +
                                 wave.2008, 
                               data=dataset.injury.pooled, weights=weights)
    summary(pooled.output.threat)
    summary(pooled.output.injury)
    # Number of observations
    nrow(is.na(dataset.threat.pooled)) # is.na not necessary since no missings
    nrow(is.na(dataset.injury.pooled))
    # MAKES NO DIFFERENCE

















###################################################
############## ROBUSTNESS CHECKS ##################
###################################################


# FIGURE 5/TABLE 7: VICTIMIZATION OF HIGH SUFFERING
###################################################
    
    # DEFINE LOOP VARIABLES
      treatments <- c("intense.threat.2004", "intense.threat.2005", "intense.threat.2006", "intense.threat.2007","intense.threat.2008") # all treatments
      outcomes <- c("trust.03.04", "trust.04.05", "trust.05.06", "trust.06.07", "trust.07.08")
      male <- "male"
      age <- "age"
      educations <- c("education.2003", "education.2004", "education.2005", "education.2006", "education.2007")
      members <- c("member.2003", "member.2004", "member.2005", "member.2006", "member.2007")
      incomes <- c("income.2003", "income.2004", "income.2005", "income.2006", "income.2007")
      victims <- c("victim.2003", "victim.2004", "victim.2005", "victim.2006", "victim.2007")
      unemps <- c("unemployed.2003", "unemployed.2004", "unemployed.2005", "unemployed.2006", "unemployed.2007")
      joblosses <- c("jobloss.03.04", "jobloss.04.05", "jobloss.05.06", "jobloss.06.07","jobloss.07.08")
      minorities <- c("minority.2003", "minority.2004", "minority.2005","minority.2006", "minority.2007")                    

    # DEFINE OUTPUT OBJECTS
      mgens <- NULL # list for the Match() output
      balance <- NULL
      balance.stats <- NULL
      outputs <- NULL
      outputs.summary <- NULL
      # NEW STUFF
      n.treated <- NULL
      n.matched <- NULL
    
    # START LOOP
      ptm <- proc.time() # START CLOCK
      for(i in 1:5){
      
    # DEFINE OUTCOME AND TREATMENT VARIABLE  
      outcome <- outcomes[i]; treatment <- treatments[i]; print(outcome); print(treatment) 
      
    # DEFINE CONTROLVARIABLES THAT HAVE TO BE LOOPED
      education <- educations[i]; member <- members[i]; income <- incomes[i]; victim <- victims[i]; unemp <- unemps[i]; jobloss <- joblosses[i]; minority <- minorities[i]
      print(age); print(male); print(education); print(member); print(income); print(victim); print(unemp); print(jobloss); print(minority) # print em to check
      
      
    # OMIT MISSINGS
      data2 <- data[c(outcome, treatment, male, age, education, member, income, victim, unemp, jobloss, minority)]
      data2 <- na.omit(data2)
      
    # DEFINE VARS FOR MATCHING
      Y <- data2[,outcome] # define outcome variable for GenMatch
      Tr <- data2[,treatment] # define treatment variable for GenMatch
      X <- cbind(data2[, male],data2[, age], data2[, education], data2[, member], data2[, income], data2[, victim], data2[, unemp], data2[, jobloss], data2[, minority])   # define covariates variable for GenMatch
      
  # MATCHING
    gen1 <- GenMatch(Tr = Tr, X = X, BalanceMatrix = X, pop.size = 500, M=1, estimand="ATT", replace=TRUE)
    mgens[[i]] <- Match(Y = Y, Tr = Tr, X = X, Weight.matrix = gen1)    
    print(summary(mgens[[i]])) 
    balance[[i]] <- MatchBalance(Tr ~ X, match.out = mgens[[i]], nboots = 1000, data = data2)
    

    # Regression with matched data and covariates
      # BELOW: PULLS UNIQUE TREATED OBS AND MATCHED OBS (CAN BE SEVERAL PER TREATED) OUT OF DATA2
      data.matched <- rbind(data2[unique(gen1$matches[,1]),],data2[gen1$matches[,2],])
      # BELOW: ATTACHES WEIGHTING VECTOR TO "data.matched": 1's FOR UNIQUE TREATED OBS AND OTHER WEIGHTS FOR MATCHES (OFTEN SEVERAL)
      data.matched <- cbind(data.matched, weights=c(rep(1, length(unique(gen1$matches[,1]))), gen1$matches[,3]))
      # BELOW: COUNTS NUMBER OF TREATED AND UNTREATED OBSERVATIONS
      n.treated[[i]] <- length(unique(gen1$matches[,1])) # count u 
      n.matched[[i]] <- length(gen1$matches[,2]) # untreated observations (matching with replaced), are downweighted if there are several matches
  
    # test <- data.matched[order(data.matched$weights),]  
    outputs.summary[[i]] <- summary(lm(paste(outcome, "~", paste(names(data2)[-1],sep="", collapse=" + "), sep=""), data=data.matched, weights=weights))
    outputs[[i]] <- lm(paste(outcome, "~", paste(names(data2)[-1],sep="", collapse=" + "), sep=""), data=data.matched, weights=weights)
    
  # GENERATE BALANCE STATS
      n.covariates <- ncol(X)
      x.2 <- NULL
      for (z in 1:n.covariates){
        mean.diff.before <- balance[[i]]$BeforeMatching[[z]]$mean.Tr-balance[[i]]$BeforeMatching[[z]]$mean.Co
        mean.diff.after <-balance[[i]]$AfterMatching[[z]]$mean.Tr-balance[[i]]$AfterMatching[[z]]$mean.Co
        p.value.before <- balance[[i]]$BeforeMatching[[z]]$tt$p.value      
        p.value.after <- balance[[i]]$AfterMatching[[z]]$tt$p.value
        x <- c(mean.diff.before, p.value.before, mean.diff.after, p.value.after)
        x.2 <- rbind.data.frame(x.2, x)
      }
      x.2 <- cbind(x.2, names(data2)[-(1:2)])
      names(x.2) <- c("mean.diff.before", "p.value.before", "mean.diff.after", "p.value.after", "var") 
      balance.stats[[i]] <- x.2
      names(balance.stats)[i] <- capitalize(str_replace_all(outcome, "[.]", " "))
      balance.stats[[i]] <- round(balance.stats[[i]][,1:4], 2)
      balance.stats[[i]]$orig.nobs <- mgens[[i]]$orig.nobs 
      balance.stats[[i]]$orig.treated.nobs  <- mgens[[i]]$orig.treated.nobs
      balance.stats[[i]]$n.matched.observations  <- n.matched[[i]]    
      balance.stats[[i]] <- cbind(capitalize(str_replace_all(names(data2)[-(1:2)], "[.]", " ")), balance.stats[[i]])
      names(balance.stats[[i]])[1] <- "variable"
      rownames(balance.stats[[i]])[1] <- capitalize(str_replace_all(outcome, "[.]", " ")) 
      rownames(balance.stats[[i]])[2] <- capitalize(str_replace_all(treatment, "[.]", " ")) 
      names(balance.stats[[i]]) <- capitalize(str_replace_all(names(balance.stats[[i]]), "[.]", " "))
    }
      proc.time() - ptm # 879/60=14min
      save(mgens, balance, balance.stats, outputs, outputs.summary, file="Table7_matching_results.RData")     # load("matching2.RData")
    
    # TABLES FOR ESTIMATION OUTPUTS
      for (i in 1:5){
            names(outputs[[i]]$coefficients)[3:11] <- c("male", "age", "education", "member", 
                                                "income", "victim", "unemployed", "jobloss", "minority")
      names(outputs[[i]]$coefficients)  <- capitalize(str_replace_all(names(outputs[[i]]$coefficients), "[.]", " "))
      }
      capitalize <- Hmisc::capitalize # load function without package
      stargazer(outputs[[1]],  outputs[[2]],  outputs[[3]],  outputs[[4]],  outputs[[5]],
      column.labels = capitalize(str_replace_all(outcomes, "[.]", " ")),
      type="latex", title="Regression Results", dep.var.labels=NULL, omit.stat=c("LL","ser","f"), ci=FALSE, digits=2, ci.level=0.95, single.row=FALSE)    


    # TABLE FOR BALANCE STATS
      balance.stats2 <- NULL
      for (i in 1:5){
        balance.stats2 <- rbind(balance.stats2, as.matrix(balance.stats[[i]]))
      } 
     raus <- c("3", "4", "5", "6", "7", "8", "9"); for(i in raus){rownames(balance.stats2)[rownames(balance.stats2)==i] <- "löschen"}
      library(Hmisc)
      latex(balance.stats2, file="")
      detach("package:Hmisc") 
    
    
    # ESTIMATES FOR PLOT
      estimates <- NULL
      for (i in 1:5){
        estimate.i <- c(outputs[[i]]$coef[2], confint(outputs[[i]])[2,1],confint(outputs[[i]])[2,2])
        estimates <- rbind(estimates, estimate.i)
      }
      estimates <- data.frame(estimates)
      years <- c("2004", "2005", "2006", "2007","2008") # years to insert into variables names
      estimates <- cbind(estimates, as.numeric(years), treatments)
      estimates$treatments <- as.character(estimates$treatments)
      names(estimates) <- c("coef", "lowerci", "upperci", "year", "treatment") 
      
    # GRAPH ESTIMATES
      windows(5,3)
      par(mar=c(3, 4, 1, 0), oma=c(0, 0, 0, 0))
      # points and segments for attack and threat
      plot(estimates[grep("threat", estimates$treatment), 4], estimates[grep("threat", estimates$treatment), 1], pch=1, axes = F, xlab = "", ylab = "", cex = .8, ylim=c(-2,2), xlim = c(2003.8,2008.5), xaxs = "r", main = "") # plot coefficients
      segments(estimates[grep("threat", estimates$treatment), 4], estimates[grep("threat", estimates$treatment), 2], estimates[grep("threat", estimates$treatment), 4], estimates[grep("threat", estimates$treatment), 3], lwd =  1) # plot confidence bars
      axis(1, at = seq(2004,2008, by=1), tick = T,cex.axis = 0.8) # x-axis
      axis(2, at = seq(-2,2, by=1), label = seq(-2,2, by=1), las = 1, tick = T, ,mgp = c(2,.6,0), cex.axis = 0.8)
      segments(2003, 0, 2008.2, 0, lwd =  1, lty=2, col="gray") # 0 line
      segments(2003, -1, 2008.2, -1, lwd =  1, lty=3, col="gray") 
      segments(2003, 1, 2008.2, 1, lwd =  1, lty=3, col="gray") 
      mtext(expression(paste("Generalized trust (", Delta,  "Y)", sep="")), side=2, line=2, cex=.8)
      legend(2008,2, cex = .7, bg = "white",
             legend = c("Intense Threat"),
             text.col = c("black"),
             col = c("black"), 
             pt.bg = c("white")
             , pch = c(1), yjust=1, xjust=1)
      
      # Add number of observations to plot
      # NEW STUFF BELOW
      for (i in c(1,2,3,4,5)){text(estimates$year[i]-0.2, -2, paste("M", i+32, ": ","N,T,M =", nobs(outputs[[i]]), ",", n.treated[[i]], ",", n.matched[[i]], sep=""), srt=90, cex=.6, adj = c(0, NA))}
      savePlot(filename = "figure5.pdf", type = "pdf")
      dev.off()
















# HOUSEHOLD DEPENDENCY (PAGE 403-404)
#####################################

  # CREATE DATASET WITHOUT NON-VICTIMS THAT BELONG TO HOUSEHOLDS THAT INCLUDE A VICTIM
      n.victims.households <- aggregate(data[,c("threat.2004", "threat.2005", "threat.2006", "threat.2007", "threat.2008")], by=list(data$houseid), FUN=sum)
      # 
      table(n.victims.households$threat.2004) # Check how many households have more than one victim
      table(n.victims.households$threat.2005)
      table(n.victims.households$threat.2006)
      table(n.victims.households$threat.2007)
      table(n.victims.households$threat.2008)
      sum(table(n.victims.households$threat.2008))
      # Condition: If at least 1 household member is a victim (of threat) then delete all other untreated observations 
      # from dataset that belong to this household (i.e. don't use them to built control group) 
      # Aim: Control groups that are generated from individuals do not live together with victims
      data.new <- data
      treatments <- c("threat.2004", "injury.2004", "threat.2005", "injury.2005", "threat.2006", "injury.2006", 
                      "threat.2007", "injury.2007", "threat.2008", "injury.2008")
      for (z in treatments){
      for (i in unique(data.new$houseid)){
      if(sum(data.new[,z][data.new$houseid==i], na.rm=TRUE)>=1){"Yes"; data.new[,z][data.new$houseid==i & data.new[,z]!=1] <- NA}
      }
      }
      table(data$threat.2004)
      table(data.new$threat.2004)
      table(data$injury.2004)
      table(data.new$injury.2004)# N of untreated decreases!

    # LOOPING VARIABLES
      treatments <- c("threat.2004", "injury.2004", "threat.2005", "injury.2005", "threat.2006", "injury.2006", "threat.2007", "injury.2007", "threat.2008", "injury.2008") # all treatments
      outcomes <- c("trust.03.04", "trust.03.04", "trust.04.05","trust.04.05", "trust.05.06", "trust.05.06", "trust.06.07", "trust.06.07", "trust.07.08", "trust.07.08")
      male <- "male"
      age <- "age"
      educations <- c("education.2003", "education.2003", "education.2004", "education.2004", "education.2005", "education.2005", "education.2006", "education.2006", "education.2007", "education.2007")
      members <- c("member.2003", "member.2003", "member.2004", "member.2004", "member.2005", "member.2005", "member.2006", "member.2006", "member.2007", "member.2007")
      incomes <- c("income.2003", "income.2003", "income.2004", "income.2004", "income.2005", "income.2005", "income.2006", "income.2006", "income.2007", "income.2007")
      victims <- c("victim.2003", "victim.2003", "victim.2004", "victim.2004", "victim.2005", "victim.2005", "victim.2006", "victim.2006", "victim.2007", "victim.2007")
      unemps <- c("unemployed.2003", "unemployed.2003", "unemployed.2004", "unemployed.2004", "unemployed.2005", "unemployed.2005", "unemployed.2006", "unemployed.2006", "unemployed.2007", "unemployed.2007")
      joblosses <- c("jobloss.03.04", "jobloss.03.04", "jobloss.04.05", "jobloss.04.05", "jobloss.05.06", "jobloss.05.06", "jobloss.06.07", "jobloss.06.07", "jobloss.07.08", "jobloss.07.08")
      minorities <- c("minority.2003", "minority.2003", "minority.2004", "minority.2004", "minority.2005", "minority.2005","minority.2006", "minority.2006", "minority.2007", "minority.2007")                    
  
    # DEFINE OUTPUT OBJECTS
      mgens <- NULL # list for the Match() output
      balance <- NULL
      balance.stats <- NULL
      outputs <- NULL
      outputs.summary <- NULL
      data.matched.datasets <- NULL
      n.treated <- NULL
      n.matched <- NULL
  
    # START LOOP
      ptm <- proc.time() # START CLOCK
      for(i in 1:10){
      
    # DEFINE OUTCOME AND TREATMENT VARIABLE  
      outcome <- outcomes[i]; treatment <- treatments[i]; print(outcome); print(treatment) 
    
    # DEFINE CONTROLVARIABLES THAT HAVE TO BE LOOPED
      education <- educations[i]; member <- members[i]; income <- incomes[i]; victim <- victims[i]; unemp <- unemps[i]; jobloss <- joblosses[i]; minority <- minorities[i]
      print(age); print(male); print(education); print(member); print(income); print(victim); print(unemp); print(jobloss); print(minority) # print em to check
  
    
    # OMIT MISSINGS - data.new excludes NON-Victims that have a Victim has a houshold member
      data2 <- data.new[c(outcome, treatment, male, age, education, member, income, victim, unemp, jobloss, minority)]
      data2 <- na.omit(data2)
    
    # DEFINE VARS FOR MATCHING
      Y <- data2[,outcome] # define outcome variable for GenMatch
      Tr <- data2[,treatment] # define treatment variable for GenMatch
      X <- cbind(data2[, male],data2[, age], data2[, education], data2[, member], data2[, income], data2[, victim], data2[, unemp], data2[, jobloss], data2[, minority])   # define covariates variable for GenMatch
    
    # MATCHING
      gen1 <- GenMatch(Tr = Tr, X = X, BalanceMatrix = X, pop.size = 500, M=1, estimand="ATT", replace=TRUE)
      mgens[[i]] <- Match(Y = Y, Tr = Tr, X = X, Weight.matrix = gen1)    
      print(summary(mgens[[i]])) 
      balance[[i]] <- MatchBalance(Tr ~ X, match.out = mgens[[i]], nboots = 1000, data = data2)
      
  
      # Regression with matched data and covariates
        # BELOW: PULLS UNIQUE TREATED OBS AND MATCHED OBS (CAN BE SEVERAL PER TREATED) OUT OF DATA2
        data.matched <- rbind(data2[unique(gen1$matches[,1]),],data2[gen1$matches[,2],])
        # BELOW: ATTACHES WEIGHTING VECTOR TO "data.matched": 1's FOR UNIQUE TREATED OBS AND OTHER WEIGHTS FOR MATCHES (OFTEN SEVERAL)
        data.matched <- cbind(data.matched, weights=c(rep(1, length(unique(gen1$matches[,1]))), gen1$matches[,3]))
        # BELOW: COUNTS NUMBER OF TREATED AND UNTREATED OBSERVATIONS
        n.treated[[i]] <- length(unique(gen1$matches[,1])) # count u 
        n.matched[[i]] <- length(gen1$matches[,2])
  
      # test <- data.matched[order(data.matched$weights),]  
      outputs.summary[[i]] <- summary(lm(paste(outcome, "~", paste(names(data2)[-1],sep="", collapse=" + "), sep=""), data=data.matched, weights=weights))
      outputs[[i]] <- lm(paste(outcome, "~", paste(names(data2)[-1],sep="", collapse=" + "), sep=""), data=data.matched, weights=weights)
      
      # GENERATE BALANCE STATS
        n.covariates <- ncol(X)
        x.2 <- NULL
        for (z in 1:n.covariates){
          mean.diff.before <- balance[[i]]$BeforeMatching[[z]]$mean.Tr-balance[[i]]$BeforeMatching[[z]]$mean.Co
          mean.diff.after <-balance[[i]]$AfterMatching[[z]]$mean.Tr-balance[[i]]$AfterMatching[[z]]$mean.Co
          p.value.before <- balance[[i]]$BeforeMatching[[z]]$tt$p.value      
          p.value.after <- balance[[i]]$AfterMatching[[z]]$tt$p.value
          x <- c(mean.diff.before, p.value.before, mean.diff.after, p.value.after)
          x.2 <- rbind.data.frame(x.2, x)
        }
        x.2 <- cbind(x.2, names(data2)[-(1:2)])
        names(x.2) <- c("mean.diff.before", "p.value.before", "mean.diff.after", "p.value.after", "var") 
        balance.stats[[i]] <- x.2
        names(balance.stats)[i] <- capitalize(str_replace_all(outcome, "[.]", " "))
        balance.stats[[i]] <- round(balance.stats[[i]][,1:4], 2)
        balance.stats[[i]]$orig.nobs <- mgens[[i]]$orig.nobs 
        balance.stats[[i]]$orig.treated.nobs  <- mgens[[i]]$orig.treated.nobs
        balance.stats[[i]]$n.matched.observations  <- n.matched[[i]]    
        balance.stats[[i]] <- cbind(capitalize(str_replace_all(names(data2)[-(1:2)], "[.]", " ")), balance.stats[[i]])
        names(balance.stats[[i]])[1] <- "variable"
        rownames(balance.stats[[i]])[1] <- capitalize(str_replace_all(outcome, "[.]", " ")) 
        rownames(balance.stats[[i]])[2] <- capitalize(str_replace_all(treatment, "[.]", " ")) 
        names(balance.stats[[i]]) <- capitalize(str_replace_all(names(balance.stats[[i]]), "[.]", " "))
      }
      proc.time() - ptm # 1223/60=20min
      save(mgens, balance, balance.stats, outputs, outputs.summary, file="Household_dependency_matching.RData")     # load("matching2.RData")

    # ESTIMATES FOR PLOT
      estimates <- NULL
      for (i in 1:10){
        estimate.i <- c(outputs[[i]]$coef[2], confint(outputs[[i]])[2,1],confint(outputs[[i]])[2,2])
        estimates <- rbind(estimates, estimate.i)
      }
      estimates <- data.frame(estimates)
      years <- c("2004", "2004", "2005", "2005", "2006", "2006", "2007", "2007", "2008", "2008") # years to insert into variables names
      estimates <- cbind(estimates, as.numeric(years), treatments)
      estimates$treatments <- as.character(estimates$treatments)
      names(estimates) <- c("coef", "lowerci", "upperci", "year", "treatment") 

    # GRAPH ESTIMATES
      windows(6,4)
      par(mar=c(3, 4, 1, 0), oma=c(0, 0, 0, 0))
      # points and segments for attack and threat
      plot(estimates[grep("threat", estimates$treatment), 4]-0.15, estimates[grep("threat", estimates$treatment), 1], pch=16, axes = F, xlab = "", ylab = "", cex = .8, ylim=c(-2,2), xlim = c(2003.8,2008.5), xaxs = "r", main = "") # plot coefficients
      segments(estimates[grep("threat", estimates$treatment), 4]-0.15, estimates[grep("threat", estimates$treatment), 2], estimates[grep("threat", estimates$treatment), 4]-0.15, estimates[grep("threat", estimates$treatment), 3], lwd =  1) # plot confidence bars
      axis(1, at = seq(2004,2008, by=1), tick = T,cex.axis = 0.8) # x-axis
      axis(2, at = seq(-2,2, by=1), label = seq(-2,2, by=1), las = 1, tick = T, ,mgp = c(2,.6,0), cex.axis = 0.8)
      segments(2003, 0, 2008.2, 0, lwd =  1, lty=2, col="gray") # 0 line
      segments(2003, -1, 2008.2, -1, lwd =  1, lty=3, col="gray") 
      segments(2003, 1, 2008.2, 1, lwd =  1, lty=3, col="gray") 
      mtext(expression(paste("Generalized trust (", Delta,  "Y)", sep="")), side=2, line=2, cex=.8)
      # points and segments for injury
      points(estimates[grep("injury", estimates$treatment), 4], estimates[grep("injury", estimates$treatment), 1], pch = 24)
      segments(estimates[grep("injury", estimates$treatment), 4], estimates[grep("injury", estimates$treatment), 2], estimates[grep("injury", estimates$treatment), 4], estimates[grep("injury", estimates$treatment), 3], lwd =  1) # plot confidence bars
        legend(2008,2, cex = .7, bg = "white",
             legend = c("Insult/Threat", "Hit/Injury"),
             text.col = c("black", "black"),
             col = c("black", "black"), 
             pt.bg = c("white", "white")
             , pch = c(16,24), yjust=1, xjust=1)
      
      # Add number of observations to plot
      for (i in c(1,3,5,7,9)){text(estimates$year[i]-0.25, -2, paste("M", i+22, ": ","N,T,M =", nobs(outputs[[i]]), ",", n.treated[[i]], ",", n.matched[[i]], sep=""), srt=90, cex=.6, adj = c(0, NA))}
      for (i in c(2,4,6,8,10)){text(estimates$year[i]+.10, -2, paste("M", i+22, ": ","N,T,M =", nobs(outputs[[i]]), ",", n.treated[[i]], ",", n.matched[[i]], sep=""), srt=90, cex=.6, adj = c(0, NA))}
      savePlot(filename = "Household_dependency_matching.pdf", type = "pdf")
      dev.off()








# QUANTILES OF THE DEPENDENT VARIABLE - 3-7 (PAGE 403-404)
##########################################################
      
  # DEFINE LOOP VARIABLES
    outcomes <- c("trust.3.7.03.04", "trust.3.7.03.04", "trust.3.7.04.05","trust.3.7.04.05", "trust.3.7.05.06", "trust.3.7.05.06", "trust.3.7.06.07", "trust.3.7.06.07", "trust.3.7.07.08", "trust.3.7.07.08")
    treatments <- c("threat.2004", "injury.2004", "threat.2005", "injury.2005", "threat.2006", "injury.2006", "threat.2007", "injury.2007", "threat.2008", "injury.2008") # all treatments
    male <- "male"
    age <- "age"
    educations <- c("education.2003", "education.2003", "education.2004", "education.2004", "education.2005", "education.2005", "education.2006", "education.2006", "education.2007", "education.2007")
    members <- c("member.2003", "member.2003", "member.2004", "member.2004", "member.2005", "member.2005", "member.2006", "member.2006", "member.2007", "member.2007")
    incomes <- c("income.2003", "income.2003", "income.2004", "income.2004", "income.2005", "income.2005", "income.2006", "income.2006", "income.2007", "income.2007")
    victims <- c("victim.2003", "victim.2003", "victim.2004", "victim.2004", "victim.2005", "victim.2005", "victim.2006", "victim.2006", "victim.2007", "victim.2007")
    unemps <- c("unemployed.2003", "unemployed.2003", "unemployed.2004", "unemployed.2004", "unemployed.2005", "unemployed.2005", "unemployed.2006", "unemployed.2006", "unemployed.2007", "unemployed.2007")
    joblosses <- c("jobloss.03.04", "jobloss.03.04", "jobloss.04.05", "jobloss.04.05", "jobloss.05.06", "jobloss.05.06", "jobloss.06.07", "jobloss.06.07", "jobloss.07.08", "jobloss.07.08")
    minorities <- c("minority.2003", "minority.2003", "minority.2004", "minority.2004", "minority.2005", "minority.2005","minority.2006", "minority.2006", "minority.2007", "minority.2007")                    

  # DEFINE OUTPUT OBJECTS
    mgens <- NULL # list for the Match() output
    balance <- NULL
    balance.stats <- NULL
    outputs <- NULL
    outputs.summary <- NULL
    data.matched.datasets <- NULL
    n.treated <- NULL
    n.matched <- NULL

  # START LOOP
    ptm <- proc.time() # START CLOCK
    for(i in 1:10){
    
  # DEFINE OUTCOME AND TREATMENT VARIABLE  
    outcome <- outcomes[i]; treatment <- treatments[i]; print(outcome); print(treatment) 
  
  # DEFINE CONTROLVARIABLES THAT HAVE TO BE LOOPED
    education <- educations[i]; member <- members[i]; income <- incomes[i]; victim <- victims[i]; unemp <- unemps[i]; jobloss <- joblosses[i]; minority <- minorities[i]
    print(age); print(male); print(education); print(member); print(income); print(victim); print(unemp); print(jobloss); print(minority) # print em to check

  
  # OMIT MISSINGS
    data2 <- data[c(outcome, treatment, male, age, education, member, income, victim, unemp, jobloss, minority)]
    data2 <- na.omit(data2)
  
  # DEFINE VARS FOR MATCHING
    Y <- data2[,outcome] # define outcome variable for GenMatch
    Tr <- data2[,treatment] # define treatment variable for GenMatch
    X <- cbind(data2[, male],data2[, age], data2[, education], data2[, member], data2[, income], data2[, victim], data2[, unemp], data2[, jobloss], data2[, minority])   # define covariates variable for GenMatch
  
  # MATCHING
    gen1 <- GenMatch(Tr = Tr, X = X, BalanceMatrix = X, pop.size = 500, M=1, estimand="ATT", replace=TRUE)
    mgens[[i]] <- Match(Y = Y, Tr = Tr, X = X, Weight.matrix = gen1)    
    balance[[i]] <- MatchBalance(Tr ~ X, match.out = mgens[[i]], nboots = 1000, data = data2)
    }
    proc.time() - ptm # 1223/60=20min
    save(mgens, balance, balance.stats, outputs, outputs.summary, file="matching2_2014_09_26.RData")     # load("matching2.RData")

  # Check the results
    for (i in 1:10){print(summary(mgens[[i]]))}
 

    
    
  
        
 

# QUANTILES OF THE DEPENDENT VARIABLE - 7-10 (PAGE 403-404)
###########################################################
    
  # DEFINE LOOP VARIABLES
    outcomes <- c("trust.7.10.03.04", "trust.7.10.03.04", "trust.7.10.04.05","trust.7.10.04.05", "trust.7.10.05.06", "trust.7.10.05.06", "trust.7.10.06.07", "trust.7.10.06.07", "trust.7.10.07.08", "trust.7.10.07.08")
    treatments <- c("threat.2004", "injury.2004", "threat.2005", "injury.2005", "threat.2006", "injury.2006", "threat.2007", "injury.2007", "threat.2008", "injury.2008") # all treatments
    male <- "male"
    age <- "age"
    educations <- c("education.2003", "education.2003", "education.2004", "education.2004", "education.2005", "education.2005", "education.2006", "education.2006", "education.2007", "education.2007")
    members <- c("member.2003", "member.2003", "member.2004", "member.2004", "member.2005", "member.2005", "member.2006", "member.2006", "member.2007", "member.2007")
    incomes <- c("income.2003", "income.2003", "income.2004", "income.2004", "income.2005", "income.2005", "income.2006", "income.2006", "income.2007", "income.2007")
    victims <- c("victim.2003", "victim.2003", "victim.2004", "victim.2004", "victim.2005", "victim.2005", "victim.2006", "victim.2006", "victim.2007", "victim.2007")
    unemps <- c("unemployed.2003", "unemployed.2003", "unemployed.2004", "unemployed.2004", "unemployed.2005", "unemployed.2005", "unemployed.2006", "unemployed.2006", "unemployed.2007", "unemployed.2007")
    joblosses <- c("jobloss.03.04", "jobloss.03.04", "jobloss.04.05", "jobloss.04.05", "jobloss.05.06", "jobloss.05.06", "jobloss.06.07", "jobloss.06.07", "jobloss.07.08", "jobloss.07.08")
    minorities <- c("minority.2003", "minority.2003", "minority.2004", "minority.2004", "minority.2005", "minority.2005","minority.2006", "minority.2006", "minority.2007", "minority.2007")                    

  # DEFINE OUTPUT OBJECTS
    mgens <- NULL # list for the Match() output
    balance <- NULL
    balance.stats <- NULL
    outputs <- NULL
    outputs.summary <- NULL
    data.matched.datasets <- NULL
    n.treated <- NULL
    n.matched <- NULL

  # START LOOP
    ptm <- proc.time() # START CLOCK
    for(i in 1:10){
    
  # DEFINE OUTCOME AND TREATMENT VARIABLE  
    outcome <- outcomes[i]; treatment <- treatments[i]; print(outcome); print(treatment) 
  
  # DEFINE CONTROLVARIABLES THAT HAVE TO BE LOOPED
    education <- educations[i]; member <- members[i]; income <- incomes[i]; victim <- victims[i]; unemp <- unemps[i]; jobloss <- joblosses[i]; minority <- minorities[i]
    print(age); print(male); print(education); print(member); print(income); print(victim); print(unemp); print(jobloss); print(minority) # print em to check

  
  # OMIT MISSINGS
    data2 <- data[c(outcome, treatment, male, age, education, member, income, victim, unemp, jobloss, minority)]
    data2 <- na.omit(data2)
  
  # DEFINE VARS FOR MATCHING
    Y <- data2[,outcome] # define outcome variable for GenMatch
    Tr <- data2[,treatment] # define treatment variable for GenMatch
    X <- cbind(data2[, male],data2[, age], data2[, education], data2[, member], data2[, income], data2[, victim], data2[, unemp], data2[, jobloss], data2[, minority])   # define covariates variable for GenMatch
  
  # MATCHING
    gen1 <- GenMatch(Tr = Tr, X = X, BalanceMatrix = X, pop.size = 500, M=1, estimand="ATT", replace=TRUE)
    mgens[[i]] <- Match(Y = Y, Tr = Tr, X = X, Weight.matrix = gen1)    
    balance[[i]] <- MatchBalance(Tr ~ X, match.out = mgens[[i]], nboots = 1000, data = data2)
    }
    proc.time() - ptm # 1223/60=20min
    save(mgens, balance, balance.stats, outputs, outputs.summary, file="matching2_2014_09_26.RData")     # load("matching2.RData")

  # Check the results
    for (i in 1:10){print(summary(mgens[[i]]))}








# MODELS 23-32: CUMULATIVE VICTIMIZATION ADDED
##############################################
    
  # FIGURE 4/TABLE 5: CHANGE SCORE ANALYSIS WITH GENETIC MATCHING
    # Use "victims" instead of original victim variables
    victims <- c("victim.2003", "victim.2003",
                  "cumulative.previous.victimization.2004", "cumulative.previous.victimization.2004", "cumulative.previous.victimization.2005", "cumulative.previous.victimization.2005",
                  "cumulative.previous.victimization.2006", "cumulative.previous.victimization.2006", "cumulative.previous.victimization.2007", "cumulative.previous.victimization.2007",
                  "cumulative.previous.victimization.2008", "cumulative.previous.victimization.2008")
    treatments <- c("threat.2004", "injury.2004", "threat.2005", "injury.2005", "threat.2006", "injury.2006", "threat.2007", "injury.2007", "threat.2008", "injury.2008") # all treatments
    outcomes <- c("trust.03.04", "trust.03.04", "trust.04.05","trust.04.05", "trust.05.06", "trust.05.06", "trust.06.07", "trust.06.07", "trust.07.08", "trust.07.08")
    male <- "male"
    age <- "age"
    educations <- c("education.2003", "education.2003", "education.2004", "education.2004", "education.2005", "education.2005", "education.2006", "education.2006", "education.2007", "education.2007")
    members <- c("member.2003", "member.2003", "member.2004", "member.2004", "member.2005", "member.2005", "member.2006", "member.2006", "member.2007", "member.2007")
    incomes <- c("income.2003", "income.2003", "income.2004", "income.2004", "income.2005", "income.2005", "income.2006", "income.2006", "income.2007", "income.2007")
    unemps <- c("unemployed.2003", "unemployed.2003", "unemployed.2004", "unemployed.2004", "unemployed.2005", "unemployed.2005", "unemployed.2006", "unemployed.2006", "unemployed.2007", "unemployed.2007")
    joblosses <- c("jobloss.03.04", "jobloss.03.04", "jobloss.04.05", "jobloss.04.05", "jobloss.05.06", "jobloss.05.06", "jobloss.06.07", "jobloss.06.07", "jobloss.07.08", "jobloss.07.08")
    minorities <- c("minority.2003", "minority.2003", "minority.2004", "minority.2004", "minority.2005", "minority.2005","minority.2006", "minority.2006", "minority.2007", "minority.2007")                    

  # DEFINE OUTPUT OBJECTS
    mgens <- NULL # list for the Match() output
    balance <- NULL
    balance.stats <- NULL
    outputs <- NULL
    outputs.summary <- NULL
    data.matched.datasets <- NULL
    n.treated <- NULL
    n.matched <- NULL

  # START LOOP
    ptm <- proc.time() # START CLOCK
    for(i in 1:10){
    
  # DEFINE OUTCOME AND TREATMENT VARIABLE  
    outcome <- outcomes[i]; treatment <- treatments[i]; print(outcome); print(treatment) 
  
  # DEFINE CONTROLVARIABLES THAT HAVE TO BE LOOPED
    education <- educations[i]; member <- members[i]; income <- incomes[i]; victim <- victims[i]; unemp <- unemps[i]; jobloss <- joblosses[i]; minority <- minorities[i]
    print(age); print(male); print(education); print(member); print(income); print(victim); print(unemp); print(jobloss); print(minority) # print em to check

  
  # OMIT MISSINGS
    data2 <- data[c(outcome, treatment, male, age, education, member, income, victim, unemp, jobloss, minority)]
    data2 <- na.omit(data2)
  
  # DEFINE VARS FOR MATCHING
    Y <- data2[,outcome] # define outcome variable for GenMatch
    Tr <- data2[,treatment] # define treatment variable for GenMatch
    X <- cbind(data2[, male],data2[, age], data2[, education], data2[, member], data2[, income], data2[, victim], data2[, unemp], data2[, jobloss], data2[, minority])   # define covariates variable for GenMatch
  
  # MATCHING
    gen1 <- GenMatch(Tr = Tr, X = X, BalanceMatrix = X, pop.size = 500, M=1, estimand="ATT", replace=TRUE)
    mgens[[i]] <- Match(Y = Y, Tr = Tr, X = X, Weight.matrix = gen1)    
    print(summary(mgens[[i]])) 
    balance[[i]] <- MatchBalance(Tr ~ X, match.out = mgens[[i]], nboots = 1000, data = data2)
    }
    proc.time() - ptm # 1223/60=20min
    save(mgens, balance, balance.stats, outputs, outputs.summary, file="matching2_2014_09_26.RData")

  # Check the results
    for (i in 1:10){print(summary(mgens[[i]]))}















    
     